import requests
from lxml import etree
import json
import time

# 阳光热线问政平台
class Sun():
    def __init__(self):
        self.start_url = "http://wz.sun0769.com/index.php/question/questionType?type=4&page={}"

        self.headers = {
            'User - Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 75.0.3770.90Safari / 537.36'
            }

    def get_url_list(self):
        url_list = []
        for i in range(5):
            url_list.append(self.start_url.format(i * 30))
        # print(url_list)
        return url_list

    def parse_url(self, url):
        response = requests.get(url, headers=self.headers)
        r = response.content.decode('gbk')
        # print('!!!', response)
        html = etree.HTML(r)
        # print(html)
        return html

    def get_content_list(self, html):
        content_list = []
        tr_list = html.xpath('//div[@class="greyframe"]//tr')
        # print(tr_list)
        for td in tr_list:
            item = {}
            item["标题"] = td.xpath('.//a[@class="news14"]/text()')
            item['URL'] = td.xpath('.//a[@class="news14"]/@href')
            item['时间'] = td.xpath('.//td[@class="t12wh"]/text()')
            # print(item)
            content_list.append(item)

        return content_list

    def save_content_list(self,content_list):
        with open('阳光热线问政平台.txt', 'a', encoding='utf-8') as f:
            for content in content_list:

                # print(content)
                f.write(json.dumps(content, ensure_ascii=False))
                f.write('\n')


    def run(self):
        url_list = self.get_url_list()

        for url in url_list:
            html = self.parse_url(url)
            content_list = self.get_content_list(html)

            self.save_content_list(content_list)


if __name__ == '__main__':
    sun = Sun()
    sun.run()
